다음 소스를 이용
https://github.com/tristangoossens/php-sitemap-generator
sitemap-config.php
https://www.your-domain-name.com/ or http://www.your-domain-name.com/
"SITE_URL" => "https://macaronics.net/",
// Boolean for crawling external links.
// *Domain = https://www.student-laptop.nl* , *Link = https://www.google.com*
"ALLOW_EXTERNAL_LINKS" => false,
// Boolean for crawling element id links.
// will not be crawled when this option is set to false
"ALLOW_ELEMENT_LINKS" => false,
// If set the crawler will only index the anchor tags with the given id.
// If you wish to crawl all links set the value to ""
// When CRAWL_ANCHORS_WITH_ID is set to "internal-link" this link will be crawled
// but will not be crawled.
"CRAWL_ANCHORS_WITH_ID" => "",
// Array with absolute links or keywords for the pages to skip when crawling the given SITE_URL.
// https://student-laptop.nl/info/laptops or you can just input student-laptop.nl/info/ and it will not crawl anything in that directory
// Try to be as specific as you can so you dont skip 300 pages
"KEYWORDS_TO_SKIP" => array(),
// Location + filename where the sitemap will be saved.
"SAVE_LOC" => "sitemap.xml",
// Static priority value for sitemap
"PRIORITY" => 1,
// Static update frequency
"CHANGE_FREQUENCY" => "daily",
// Date changed (today's date)
"LAST_UPDATED" => date('Y-m-d'),
);
sitemap-generator.php
config = $conf;
$this->scanned = [];
$this->site_url_base = parse_url($this->config['SITE_URL'])['scheme'] . "://" . parse_url($this->config['SITE_URL'])['host'];
$this->sitemap_file = fopen($this->config['SAVE_LOC'], "w");
}
public function GenerateSitemap($list_url, $list_date)
{
// Call the recursive crawl function with the start url.
//$this->crawlPage($this->config['SITE_URL']);
// Generate a sitemap with the scanned pages.
$this->generateFile($list_url, $list_date);
}
// Get the html content of a page and return it as a dom object
private function getHtml($url)
{
// Get html from the given page
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$html = curl_exec($curl);
curl_close($curl);
//Load the html and store it into a DOM object
$dom = new DOMDocument();
@$dom->loadHTML($html);
return $dom;
}
// Recursive function that crawls a page's anchor tags and store them in the scanned array.
private function crawlPage($page_url)
{
$url = filter_var($page_url, FILTER_SANITIZE_URL);
// Check if the url is invalid or if the page is already scanned;
if (in_array($url, $this->scanned) || !filter_var($page_url, FILTER_VALIDATE_URL)) {
return;
}
// Add the page url to the scanned array
array_push($this->scanned, $page_url);
// Get the html content from the
$html = $this->getHtml($url);
$anchors = $html->getElementsByTagName('a');
// Loop through all anchor tags on the page
foreach ($anchors as $a) {
$next_url = $a->getAttribute('href');
// Check if there is a anchor ID set in the config.
if ($this->config['CRAWL_ANCHORS_WITH_ID'] != "") {
// Check if the id is set and matches the config setting, else it will move on to the next anchor
if ($a->getAttribute('id') != "" || $a->getAttribute('id') == $this->config['CRAWL_ANCHORS_WITH_ID']) {
continue;
}
}
// Split page url into base and extra parameters
$base_page_url = explode("?", $page_url)[0];
if (!$this->config['ALLOW_ELEMENT_LINKS']) {
// Skip the url if it starts with a # or is equal to root.
if (substr($next_url, 0, 1) == "#" || $next_url == "/") {
continue;
}
}
// Check if the given url is external, if yes it will skip the iteration
// This code will only run if you set ALLOW_EXTERNAL_LINKS to false in the config.
if (!$this->config['ALLOW_EXTERNAL_LINKS']) {
$parsed_url = parse_url($next_url);
if (isset($parsed_url['host'])) {
if ($parsed_url['host'] != parse_url($this->config['SITE_URL'])['host']) {
continue;
}
}
}
// Check if the link is absolute or relative.
if (substr($next_url, 0, 7) != "http://" && substr($next_url, 0, 8) != "https://") {
$next_url = $this->convertRelativeToAbsolute($base_page_url, $next_url);
}
// Check if the next link contains any of the pages to skip. If true, the loop will move on to the next iteration.
$found = false;
foreach ($this->config['KEYWORDS_TO_SKIP'] as $skip) {
if (strpos($next_url, $skip) || $next_url === $skip) {
$found = true;
}
}
// Call the function again with the new URL
if (!$found) {
$this->crawlPage($next_url);
}
}
}
// Convert a relative link to a absolute link
// Example: Relative /articles
// Absolute https://student-laptop.nl/articles
private function convertRelativeToAbsolute($page_base_url, $link)
{
$first_character = substr($link, 0, 1);
if ($first_character == "?" || $first_character == "#") {
return $page_base_url . $link;
} else if ($first_character != "/") {
return $this->site_url_base . "/" . $link;
} else {
return $this->site_url_base . $link;
}
}
// Function to generate a Sitemap with the given pages array where the script has run through
private function generateFile($pages , $date)
{
$xml = '
';
// Print the amount of pages
//echo count($pages);
//$this->config['LAST_UPDATED']
for($i=0; $i< count($pages); $i++) {
$xml .= "". $pages[$i]. "
" . $date[$i] . "
" . $this->config['CHANGE_FREQUENCY'] . "
" . $this->config['PRIORITY'] . "";
}
$xml .= "";
$xml = str_replace('&', '&', $xml);
// Format string to XML
$dom = new DOMDocument;
$dom->preserveWhiteSpace = FALSE;
$dom->loadXML($xml);
$dom->formatOutput = TRUE;
// Write XML to file and close it
fwrite($this->sitemap_file, $dom->saveXML());
fclose($this->sitemap_file);
}
}
모델 에서 다음과 같이 가져온다.
function selectSitemapList(){
$sql ="select board_id, DATE_FORMAT(reg_date, '%Y-%m-%dT%TZ') AS reg_date, board_subject from ci_board order by board_id desc Limit 0 , 500";
$result=$this->db->query($sql);
return $result->result();
}
컨트롤
$sitemapList =$this->auth_m->selectSitemapList();
$sitemapUrl=$this->config->base_url()."index.php";
$list_url = [];
$list_date = [];
foreach( $sitemapList as $board){
//다음과 같은 형식의 sitemapurl 생성 : https://macaronics.net/index.php/m01/jsp/view/1738
$sitemap_URL= $sitemapUrl."/".url_name($board->board_subject)."/".$board->board_subject."/view/".$board->board_id;
array_push($list_url, $sitemap_URL);
array_push($list_date, $board->reg_date);
}
include "/home1/macaronics/public_html/aaa/sitemap-generator.php";
$smg = new SitemapGenerator(include("/home1/macaronics/public_html/aaa/sitemap-config.php"));
$smg->GenerateSitemap($list_url,$list_date);
=>xml 파일로 생성
private function generateFile($pages , $date)
{
$xml = '
';
// Print the amount of pages
//echo count($pages);
//$this->config['LAST_UPDATED']
for($i=0; $i< count($pages); $i++) {
$xml .= "". $pages[$i]. "
" . $date[$i] . "
" . $this->config['CHANGE_FREQUENCY'] . "
" . $this->config['PRIORITY'] . "";
}
$xml .= "";
$xml = str_replace('&', '&', $xml);
// Format string to XML
$dom = new DOMDocument;
$dom->preserveWhiteSpace = FALSE;
$dom->loadXML($xml);
$dom->formatOutput = TRUE;
// Write XML to file and close it
fwrite($this->sitemap_file, $dom->saveXML());
fclose($this->sitemap_file);
}
작업 스케쥴을 하던지
http://www.ciboard.co.kr/tiptech/p/340
https://www.glennstovall.com/writing-cron-job-in-codeigniter/
특정 페이지에 코드를 삽입해서 주기적으로 sitemap.xml 생성한다.
혹은, 게시판 등록시 코드를 삽입한다.














댓글 ( 4)
댓글 남기기